In [3]:
import shutil
import os
In [4]:
shutil.make_archive(
os.path.join(os.curdir, "data", "world"),
"zip" # 어떤 압축파일 형태로 저장할 것인가. (".zip" => "zip", ".tar.gz" => "gztar")
)
Out[4]:
In [5]:
shutil.make_archive(
os.path.join(os.curdir, "data", "world"),
"gztar"
)
Out[5]:
In [ ]:
for index, row in country_df.iterrows():
country_code = row["Code"]
country_name = row["Name"]
if country_code in city_df["CountryCode"].unique():
one_city_df = city_groups.get_group(country_code)
one_city_df.to_csv(os.path.join(os.curdir, "data", "world", "{country_name}.csv".format(country_name=country_name)))
In [6]:
import pymysql
db = pymysql.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"world",
charset='utf8',
)
city_df = pd.read_sql("SELECT * FROM City;", db)
country_df = pd.read_sql("SELECT * FROM Country;", db)
In [24]:
if "data" in os.listdir():
print("./data/폴더를 삭제합니다.")
shutil.rmtree(os.path.join(os.curdir, "data"))
print("./data/폴더를 생성합니다.")
os.makedirs(os.path.join(os.curdir, "data"))
os.makedirs(os.path.join(os.curdir, "data", "world"))
# country_df => group_by => continent
# cotinent 폴더 ...
# continent_df => group_by => ...
continent_groups = country_df.groupby("Continent")
city_groups = city_df.groupby("CountryCode")
# "ATA" ... => 예외처리(city_df가 없는 경우)
unique_country_code_in_city = city_df["CountryCode"].unique()
for continent_name in country_df["Continent"].unique():
os.makedirs(os.path.join(os.curdir, "data", "world", continent_name))
continent_df = continent_groups.get_group(continent_name)
#continent_df (대륙별 DataFrame)에서 "Code"를 가져와서 city_groups에서 찾은 다음에 폴더에 넣어주자
for index, row in continent_df.iterrows():
country_code = row["Code"]
country_name = row["Name"]
if country_code in unique_country_code_in_city:
# print((continent_name, country_name))
df = city_groups.get_group(country_code)
df.to_csv(os.path.join(
os.curdir,
"data",
"world",
continent_name,
"{country_name}.csv".format(country_name=country_name)
))
# 압축하기
shutil.make_archive(
os.path.join(os.curdir, "data", "world", continent_name), # 압축파일명
"gztar",
os.path.join(os.curdir, "data", "world", continent_name), # 압축할 위치
)
In [15]:
for continent_name in country_df["Continent"].unique():
country_count = len(os.listdir(os.path.join(os.curdir, "data", "world", continent_name)))
print((continent_name, country_count))
In [26]:
shutil.unpack_archive("./data/world/Asia.tar.gz", "./Asia")